QUESTION 1:
a.)
import pandas as pd
import numpy as np
import seaborn as sns
import seaborn as sb
import matplotlib.pyplot as plt
import statsmodels.api as sm
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from statsmodels.stats.outliers_influence import variance_inflation_factor
import math
from sklearn.preprocessing import MinMaxScaler
import warnings
warnings.filterwarnings("ignore")
b.)
df = pd.read_csv('compactiv.csv')
df.head().T
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| lread | 1 | 0 | 15 | 0 | 5 |
| lwrite | 0 | 0 | 3 | 0 | 1 |
| scall | 2147 | 170 | 2162 | 160 | 330 |
| sread | 79 | 18 | 159 | 12 | 39 |
| swrite | 68 | 21 | 119 | 16 | 38 |
| fork | 0.2 | 0.2 | 2.0 | 0.2 | 0.4 |
| exec | 0.2 | 0.2 | 2.4 | 0.2 | 0.4 |
| rchar | 40671.0 | 448.0 | NaN | NaN | NaN |
| wchar | 53995.0 | 8385.0 | 31950.0 | 8670.0 | 12185.0 |
| pgout | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ppgout | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| pgfree | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| pgscan | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| atch | 0.0 | 0.0 | 1.2 | 0.0 | 0.0 |
| pgin | 1.6 | 0.0 | 6.0 | 0.2 | 1.0 |
| ppgin | 2.6 | 0.0 | 9.4 | 0.2 | 1.2 |
| pflt | 16.0 | 15.63 | 150.2 | 15.6 | 37.8 |
| vflt | 26.4 | 16.83 | 220.2 | 16.8 | 47.6 |
| runqsz | CPU_Bound | Not_CPU_Bound | Not_CPU_Bound | Not_CPU_Bound | Not_CPU_Bound |
| freemem | 4670 | 7278 | 702 | 7248 | 633 |
| freeswap | 1730946 | 1869002 | 1021237 | 1863704 | 1760253 |
| usr | 95 | 97 | 87 | 98 | 90 |
c.)
df.shape
(8192, 22)
df.info()
<class 'pandas.core.frame.DataFrame'> RangeIndex: 8192 entries, 0 to 8191 Data columns (total 22 columns): # Column Non-Null Count Dtype --- ------ -------------- ----- 0 lread 8192 non-null int64 1 lwrite 8192 non-null int64 2 scall 8192 non-null int64 3 sread 8192 non-null int64 4 swrite 8192 non-null int64 5 fork 8192 non-null float64 6 exec 8192 non-null float64 7 rchar 8088 non-null float64 8 wchar 8177 non-null float64 9 pgout 8192 non-null float64 10 ppgout 8192 non-null float64 11 pgfree 8192 non-null float64 12 pgscan 8192 non-null float64 13 atch 8192 non-null float64 14 pgin 8192 non-null float64 15 ppgin 8192 non-null float64 16 pflt 8192 non-null float64 17 vflt 8192 non-null float64 18 runqsz 8192 non-null object 19 freemem 8192 non-null int64 20 freeswap 8192 non-null int64 21 usr 8192 non-null int64 dtypes: float64(13), int64(8), object(1) memory usage: 1.4+ MB
df.drop_duplicates(inplace=True)
df.dtypes
lread int64 lwrite int64 scall int64 sread int64 swrite int64 fork float64 exec float64 rchar float64 wchar float64 pgout float64 ppgout float64 pgfree float64 pgscan float64 atch float64 pgin float64 ppgin float64 pflt float64 vflt float64 runqsz object freemem int64 freeswap int64 usr int64 dtype: object
df.isnull().sum()
lread 0 lwrite 0 scall 0 sread 0 swrite 0 fork 0 exec 0 rchar 104 wchar 15 pgout 0 ppgout 0 pgfree 0 pgscan 0 atch 0 pgin 0 ppgin 0 pflt 0 vflt 0 runqsz 0 freemem 0 freeswap 0 usr 0 dtype: int64
QUESTION:2
a.)
df.describe().T
| count | mean | std | min | 25% | 50% | 75% | max | |
|---|---|---|---|---|---|---|---|---|
| lread | 8192.0 | 1.955969e+01 | 53.353799 | 0.0 | 2.0 | 7.0 | 20.000 | 1845.00 |
| lwrite | 8192.0 | 1.310620e+01 | 29.891726 | 0.0 | 0.0 | 1.0 | 10.000 | 575.00 |
| scall | 8192.0 | 2.306318e+03 | 1633.617322 | 109.0 | 1012.0 | 2051.5 | 3317.250 | 12493.00 |
| sread | 8192.0 | 2.104800e+02 | 198.980146 | 6.0 | 86.0 | 166.0 | 279.000 | 5318.00 |
| swrite | 8192.0 | 1.500582e+02 | 160.478980 | 7.0 | 63.0 | 117.0 | 185.000 | 5456.00 |
| fork | 8192.0 | 1.884554e+00 | 2.479493 | 0.0 | 0.4 | 0.8 | 2.200 | 20.12 |
| exec | 8192.0 | 2.791998e+00 | 5.212456 | 0.0 | 0.2 | 1.2 | 2.800 | 59.56 |
| rchar | 8088.0 | 1.973857e+05 | 239837.493526 | 278.0 | 34091.5 | 125473.5 | 267828.750 | 2526649.00 |
| wchar | 8177.0 | 9.590299e+04 | 140841.707911 | 1498.0 | 22916.0 | 46619.0 | 106101.000 | 1801623.00 |
| pgout | 8192.0 | 2.285317e+00 | 5.307038 | 0.0 | 0.0 | 0.0 | 2.400 | 81.44 |
| ppgout | 8192.0 | 5.977229e+00 | 15.214590 | 0.0 | 0.0 | 0.0 | 4.200 | 184.20 |
| pgfree | 8192.0 | 1.191971e+01 | 32.363520 | 0.0 | 0.0 | 0.0 | 5.000 | 523.00 |
| pgscan | 8192.0 | 2.152685e+01 | 71.141340 | 0.0 | 0.0 | 0.0 | 0.000 | 1237.00 |
| atch | 8192.0 | 1.127505e+00 | 5.708347 | 0.0 | 0.0 | 0.0 | 0.600 | 211.58 |
| pgin | 8192.0 | 8.277960e+00 | 13.874978 | 0.0 | 0.6 | 2.8 | 9.765 | 141.20 |
| ppgin | 8192.0 | 1.238859e+01 | 22.281318 | 0.0 | 0.6 | 3.8 | 13.800 | 292.61 |
| pflt | 8192.0 | 1.097938e+02 | 114.419221 | 0.0 | 25.0 | 63.8 | 159.600 | 899.80 |
| vflt | 8192.0 | 1.853158e+02 | 191.000603 | 0.2 | 45.4 | 120.4 | 251.800 | 1365.00 |
| freemem | 8192.0 | 1.763456e+03 | 2482.104511 | 55.0 | 231.0 | 579.0 | 2002.250 | 12027.00 |
| freeswap | 8192.0 | 1.328126e+06 | 422019.426957 | 2.0 | 1042623.5 | 1289289.5 | 1730379.500 | 2243187.00 |
| usr | 8192.0 | 8.396887e+01 | 18.401905 | 0.0 | 81.0 | 89.0 | 94.000 | 99.00 |
b.)
print("df:",df.median())
df: lread 7.0 lwrite 1.0 scall 2051.5 sread 166.0 swrite 117.0 fork 0.8 exec 1.2 rchar 125473.5 wchar 46619.0 pgout 0.0 ppgout 0.0 pgfree 0.0 pgscan 0.0 atch 0.0 pgin 2.8 ppgin 3.8 pflt 63.8 vflt 120.4 freemem 579.0 freeswap 1289289.5 usr 89.0 dtype: float64
pd.DataFrame({'value': df['rchar'], 'Missing?': df['rchar'].isnull()})
| value | Missing? | |
|---|---|---|
| 0 | 40671.0 | False |
| 1 | 448.0 | False |
| 2 | NaN | True |
| 3 | NaN | True |
| 4 | NaN | True |
| ... | ... | ... |
| 8187 | 405250.0 | False |
| 8188 | 89489.0 | False |
| 8189 | 325948.0 | False |
| 8190 | 62571.0 | False |
| 8191 | 111111.0 | False |
8192 rows × 2 columns
df['rchar'].fillna(df.rchar.median(), inplace = True)
df
| lread | lwrite | scall | sread | swrite | fork | exec | rchar | wchar | pgout | ... | pgscan | atch | pgin | ppgin | pflt | vflt | runqsz | freemem | freeswap | usr | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 2147 | 79 | 68 | 0.2 | 0.20 | 40671.0 | 53995.0 | 0.00 | ... | 0.00 | 0.0 | 1.60 | 2.60 | 16.00 | 26.40 | CPU_Bound | 4670 | 1730946 | 95 |
| 1 | 0 | 0 | 170 | 18 | 21 | 0.2 | 0.20 | 448.0 | 8385.0 | 0.00 | ... | 0.00 | 0.0 | 0.00 | 0.00 | 15.63 | 16.83 | Not_CPU_Bound | 7278 | 1869002 | 97 |
| 2 | 15 | 3 | 2162 | 159 | 119 | 2.0 | 2.40 | 125473.5 | 31950.0 | 0.00 | ... | 0.00 | 1.2 | 6.00 | 9.40 | 150.20 | 220.20 | Not_CPU_Bound | 702 | 1021237 | 87 |
| 3 | 0 | 0 | 160 | 12 | 16 | 0.2 | 0.20 | 125473.5 | 8670.0 | 0.00 | ... | 0.00 | 0.0 | 0.20 | 0.20 | 15.60 | 16.80 | Not_CPU_Bound | 7248 | 1863704 | 98 |
| 4 | 5 | 1 | 330 | 39 | 38 | 0.4 | 0.40 | 125473.5 | 12185.0 | 0.00 | ... | 0.00 | 0.0 | 1.00 | 1.20 | 37.80 | 47.60 | Not_CPU_Bound | 633 | 1760253 | 90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8187 | 16 | 12 | 3009 | 360 | 244 | 1.6 | 5.81 | 405250.0 | 85282.0 | 8.02 | ... | 55.11 | 0.6 | 35.87 | 47.90 | 139.28 | 270.74 | CPU_Bound | 387 | 986647 | 80 |
| 8188 | 4 | 0 | 1596 | 170 | 146 | 2.4 | 1.80 | 89489.0 | 41764.0 | 3.80 | ... | 0.20 | 0.8 | 3.80 | 4.40 | 122.40 | 212.60 | Not_CPU_Bound | 263 | 1055742 | 90 |
| 8189 | 16 | 5 | 3116 | 289 | 190 | 0.6 | 0.60 | 325948.0 | 52640.0 | 0.40 | ... | 0.00 | 0.4 | 28.40 | 45.20 | 60.20 | 219.80 | Not_CPU_Bound | 400 | 969106 | 87 |
| 8190 | 32 | 45 | 5180 | 254 | 179 | 1.2 | 1.20 | 62571.0 | 29505.0 | 1.40 | ... | 18.04 | 0.4 | 23.05 | 24.25 | 93.19 | 202.81 | CPU_Bound | 141 | 1022458 | 83 |
| 8191 | 2 | 0 | 985 | 55 | 46 | 1.6 | 4.80 | 111111.0 | 22256.0 | 0.00 | ... | 0.00 | 0.2 | 3.40 | 6.20 | 91.80 | 110.00 | CPU_Bound | 659 | 1756514 | 94 |
8192 rows × 22 columns
pd.DataFrame({'value': df['wchar'], 'Missing?': df['wchar'].isnull()})
| value | Missing? | |
|---|---|---|
| 0 | 53995.0 | False |
| 1 | 8385.0 | False |
| 2 | 31950.0 | False |
| 3 | 8670.0 | False |
| 4 | 12185.0 | False |
| ... | ... | ... |
| 8187 | 85282.0 | False |
| 8188 | 41764.0 | False |
| 8189 | 52640.0 | False |
| 8190 | 29505.0 | False |
| 8191 | 22256.0 | False |
8192 rows × 2 columns
df['wchar'].fillna(df.wchar.median(), inplace = True)
df
| lread | lwrite | scall | sread | swrite | fork | exec | rchar | wchar | pgout | ... | pgscan | atch | pgin | ppgin | pflt | vflt | runqsz | freemem | freeswap | usr | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1 | 0 | 2147 | 79 | 68 | 0.2 | 0.20 | 40671.0 | 53995.0 | 0.00 | ... | 0.00 | 0.0 | 1.60 | 2.60 | 16.00 | 26.40 | CPU_Bound | 4670 | 1730946 | 95 |
| 1 | 0 | 0 | 170 | 18 | 21 | 0.2 | 0.20 | 448.0 | 8385.0 | 0.00 | ... | 0.00 | 0.0 | 0.00 | 0.00 | 15.63 | 16.83 | Not_CPU_Bound | 7278 | 1869002 | 97 |
| 2 | 15 | 3 | 2162 | 159 | 119 | 2.0 | 2.40 | 125473.5 | 31950.0 | 0.00 | ... | 0.00 | 1.2 | 6.00 | 9.40 | 150.20 | 220.20 | Not_CPU_Bound | 702 | 1021237 | 87 |
| 3 | 0 | 0 | 160 | 12 | 16 | 0.2 | 0.20 | 125473.5 | 8670.0 | 0.00 | ... | 0.00 | 0.0 | 0.20 | 0.20 | 15.60 | 16.80 | Not_CPU_Bound | 7248 | 1863704 | 98 |
| 4 | 5 | 1 | 330 | 39 | 38 | 0.4 | 0.40 | 125473.5 | 12185.0 | 0.00 | ... | 0.00 | 0.0 | 1.00 | 1.20 | 37.80 | 47.60 | Not_CPU_Bound | 633 | 1760253 | 90 |
| ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... | ... |
| 8187 | 16 | 12 | 3009 | 360 | 244 | 1.6 | 5.81 | 405250.0 | 85282.0 | 8.02 | ... | 55.11 | 0.6 | 35.87 | 47.90 | 139.28 | 270.74 | CPU_Bound | 387 | 986647 | 80 |
| 8188 | 4 | 0 | 1596 | 170 | 146 | 2.4 | 1.80 | 89489.0 | 41764.0 | 3.80 | ... | 0.20 | 0.8 | 3.80 | 4.40 | 122.40 | 212.60 | Not_CPU_Bound | 263 | 1055742 | 90 |
| 8189 | 16 | 5 | 3116 | 289 | 190 | 0.6 | 0.60 | 325948.0 | 52640.0 | 0.40 | ... | 0.00 | 0.4 | 28.40 | 45.20 | 60.20 | 219.80 | Not_CPU_Bound | 400 | 969106 | 87 |
| 8190 | 32 | 45 | 5180 | 254 | 179 | 1.2 | 1.20 | 62571.0 | 29505.0 | 1.40 | ... | 18.04 | 0.4 | 23.05 | 24.25 | 93.19 | 202.81 | CPU_Bound | 141 | 1022458 | 83 |
| 8191 | 2 | 0 | 985 | 55 | 46 | 1.6 | 4.80 | 111111.0 | 22256.0 | 0.00 | ... | 0.00 | 0.2 | 3.40 | 6.20 | 91.80 | 110.00 | CPU_Bound | 659 | 1756514 | 94 |
8192 rows × 22 columns
df.isin([0]).sum()
lread 675 lwrite 2684 scall 0 sread 0 swrite 0 fork 21 exec 21 rchar 0 wchar 0 pgout 4878 ppgout 4878 pgfree 4869 pgscan 6448 atch 4575 pgin 1220 ppgin 1220 pflt 3 vflt 0 runqsz 0 freemem 0 freeswap 0 usr 283 dtype: int64
c.)
nums = []
cats = []
for i in df.columns:
if df[i].dtype !='O':
nums.append(i)
else:
cats.append(i)
print(nums)
print(cats)
['lread', 'lwrite', 'scall', 'sread', 'swrite', 'fork', 'exec', 'rchar', 'wchar', 'pgout', 'ppgout', 'pgfree', 'pgscan', 'atch', 'pgin', 'ppgin', 'pflt', 'vflt', 'freemem', 'freeswap', 'usr'] ['runqsz']
for x in nums:
plt.figure(figsize=(5,10))
sns.boxplot(df[x])
plt.show()
def remove_outlier(column):
sorted(column)
q1=df[column].quantile(0.25)
q3=df[column].quantile(0.75)
iqr=q3-q1
lower=q1-1.5*iqr
upper=q3+1.5*iqr
return lower,upper
for i in nums[:-1]:
lower,upper=remove_outlier(i)
df[i]=np.where(df[i]>upper,upper,df[i])
df[i]=np.where(df[i]<lower,lower,df[i])
for i in nums:
sns.boxplot(df[i],showmeans=True)
plt.show()
d) Scaling & Normalization
scaler = MinMaxScaler()
columns=['lread','lwrite','scall','sread','swrite','fork','exec','rchar','wchar','pgout','ppgout','pgscan','pgfree','atch','pgin','ppgin','pflt','vflt','freemem','freeswap','usr']
df_scaled = scaler.fit_transform(df[columns].to_numpy())
df_scaled = pd.DataFrame(df_scaled,columns=columns)
print("Scaled Dataset Using MinMaxScaler")
df_scaled['runqsz']=df['runqsz']
df_scaled.head().T
Scaled Dataset Using MinMaxScaler
| 0 | 1 | 2 | 3 | 4 | |
|---|---|---|---|---|---|
| lread | 0.021277 | 0.0 | 0.319149 | 0.0 | 0.106383 |
| lwrite | 0.0 | 0.0 | 0.12 | 0.0 | 0.04 |
| scall | 0.305725 | 0.009151 | 0.307975 | 0.007651 | 0.033153 |
| sread | 0.129778 | 0.021333 | 0.272 | 0.010667 | 0.058667 |
| swrite | 0.168975 | 0.038781 | 0.310249 | 0.024931 | 0.085873 |
| fork | 0.040816 | 0.040816 | 0.408163 | 0.040816 | 0.081633 |
| exec | 0.029851 | 0.029851 | 0.358209 | 0.029851 | 0.059701 |
| rchar | 0.066119 | 0.000278 | 0.20493 | 0.20493 | 0.20493 |
| wchar | 0.229117 | 0.030057 | 0.132904 | 0.031301 | 0.046642 |
| pgout | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| ppgout | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| pgscan | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| pgfree | 0.0 | 0.0 | 0.0 | 0.0 | 0.0 |
| atch | 0.0 | 0.0 | 0.8 | 0.0 | 0.0 |
| pgin | 0.068049 | 0.0 | 0.255183 | 0.008506 | 0.042531 |
| ppgin | 0.077381 | 0.0 | 0.279762 | 0.005952 | 0.035714 |
| pflt | 0.04426 | 0.043237 | 0.415491 | 0.043154 | 0.104564 |
| vflt | 0.046686 | 0.029633 | 0.392017 | 0.029579 | 0.084462 |
| freemem | 1.0 | 1.0 | 0.140526 | 1.0 | 0.12554 |
| freeswap | 0.770522 | 0.832369 | 0.45258 | 0.829996 | 0.783651 |
| usr | 0.959596 | 0.979798 | 0.878788 | 0.989899 | 0.909091 |
| runqsz | CPU_Bound | Not_CPU_Bound | Not_CPU_Bound | Not_CPU_Bound | Not_CPU_Bound |
e.)
df_scaled.runqsz.unique()
array(['CPU_Bound', 'Not_CPU_Bound'], dtype=object)
df= pd.get_dummies(df, prefix='CPU_Bound', columns=['runqsz'])
df.head()
| lread | lwrite | scall | sread | swrite | fork | exec | rchar | wchar | pgout | ... | atch | pgin | ppgin | pflt | vflt | freemem | freeswap | usr | CPU_Bound_CPU_Bound | CPU_Bound_Not_CPU_Bound | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 0 | 1.0 | 0.0 | 2147.0 | 79.0 | 68.0 | 0.2 | 0.2 | 40671.0 | 53995.0 | 0.0 | ... | 0.0 | 1.6 | 2.6 | 16.00 | 26.40 | 4659.125 | 1730946.0 | 95 | 1 | 0 |
| 1 | 0.0 | 0.0 | 170.0 | 18.0 | 21.0 | 0.2 | 0.2 | 448.0 | 8385.0 | 0.0 | ... | 0.0 | 0.0 | 0.0 | 15.63 | 16.83 | 4659.125 | 1869002.0 | 97 | 0 | 1 |
| 2 | 15.0 | 3.0 | 2162.0 | 159.0 | 119.0 | 2.0 | 2.4 | 125473.5 | 31950.0 | 0.0 | ... | 1.2 | 6.0 | 9.4 | 150.20 | 220.20 | 702.000 | 1021237.0 | 87 | 0 | 1 |
| 3 | 0.0 | 0.0 | 160.0 | 12.0 | 16.0 | 0.2 | 0.2 | 125473.5 | 8670.0 | 0.0 | ... | 0.0 | 0.2 | 0.2 | 15.60 | 16.80 | 4659.125 | 1863704.0 | 98 | 0 | 1 |
| 4 | 5.0 | 1.0 | 330.0 | 39.0 | 38.0 | 0.4 | 0.4 | 125473.5 | 12185.0 | 0.0 | ... | 0.0 | 1.0 | 1.2 | 37.80 | 47.60 | 633.000 | 1760253.0 | 90 | 0 | 1 |
5 rows × 23 columns
QUESTION:3
a.)
sns.distplot(df['lread'])
<AxesSubplot:xlabel='lread', ylabel='Density'>
sns.distplot(df['lwrite'])
<AxesSubplot:xlabel='lwrite', ylabel='Density'>
sns.distplot(df['scall'])
<AxesSubplot:xlabel='scall', ylabel='Density'>
sns.distplot(df['sread'])
<AxesSubplot:xlabel='sread', ylabel='Density'>
sns.distplot(df['swrite'])
<AxesSubplot:xlabel='swrite', ylabel='Density'>
sns.distplot(df['fork'])
<AxesSubplot:xlabel='fork', ylabel='Density'>
sns.distplot(df['exec'])
<AxesSubplot:xlabel='exec', ylabel='Density'>
sns.distplot(df['rchar'])
<AxesSubplot:xlabel='rchar', ylabel='Density'>
sns.distplot(df['wchar'])
<AxesSubplot:xlabel='wchar', ylabel='Density'>
sns.distplot(df['pgout'])
<AxesSubplot:xlabel='pgout', ylabel='Density'>
sns.distplot(df['ppgout'])
<AxesSubplot:xlabel='ppgout', ylabel='Density'>
sns.distplot(df['pgfree'])
<AxesSubplot:xlabel='pgfree', ylabel='Density'>
sns.distplot(df['pgscan'])
<AxesSubplot:xlabel='pgscan', ylabel='Density'>
sns.distplot(df['atch'])
<AxesSubplot:xlabel='atch', ylabel='Density'>
sns.distplot(df['pgin'])
<AxesSubplot:xlabel='pgin', ylabel='Density'>
sns.distplot(df['ppgin'])
<AxesSubplot:xlabel='ppgin', ylabel='Density'>
sns.distplot(df['pflt'])
<AxesSubplot:xlabel='pflt', ylabel='Density'>
sns.distplot(df['freemem'])
<AxesSubplot:xlabel='freemem', ylabel='Density'>
sns.distplot(df['freeswap'])
<AxesSubplot:xlabel='freeswap', ylabel='Density'>
sns.distplot(df['usr'])
<AxesSubplot:xlabel='usr', ylabel='Density'>
We have performed univariate analysis for each variable(in the HTML file.) For lread its is positively skewed and greater frequency. For lwrite its similar to lread but its less frequent. For all of the graphs itself we can see an unique peak in the beginning and then a drop which is constant and then a sudden rise as well. For pg scan we can see there is no data to be distributed
sns.pairplot(df_scaled, diag_kind='kde',size = 3)
plt.show()